home *** CD-ROM | disk | FTP | other *** search
/ Personal Computer World 2009 February / PCWFEB09.iso / Software / Linux / Kubuntu 8.10 / kubuntu-8.10-desktop-i386.iso / casper / filesystem.squashfs / usr / lib / python2.5 / email / feedparser.pyc (.txt) < prev    next >
Python Compiled Bytecode  |  2008-10-29  |  11KB  |  453 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.5)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. __all__ = [
  21.     'FeedParser']
  22. import re
  23. from email import errors
  24. from email import message
  25. NLCRE = re.compile('\r\n|\r|\n')
  26. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  27. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  28. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  29. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{1,}:|[\\t ])')
  30. EMPTYSTRING = ''
  31. NL = '\n'
  32. NeedMoreData = object()
  33.  
  34. class BufferedSubFile(object):
  35.     '''A file-ish object that can have new data loaded into it.
  36.  
  37.     You can also push and pop line-matching predicates onto a stack.  When the
  38.     current predicate matches the current line, a false EOF response
  39.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  40.     simple abstraction -- it parses until EOF closes the current message.
  41.     '''
  42.     
  43.     def __init__(self):
  44.         self._partial = ''
  45.         self._lines = []
  46.         self._eofstack = []
  47.         self._closed = False
  48.  
  49.     
  50.     def push_eof_matcher(self, pred):
  51.         self._eofstack.append(pred)
  52.  
  53.     
  54.     def pop_eof_matcher(self):
  55.         return self._eofstack.pop()
  56.  
  57.     
  58.     def close(self):
  59.         self._lines.append(self._partial)
  60.         self._partial = ''
  61.         self._closed = True
  62.  
  63.     
  64.     def readline(self):
  65.         if not self._lines:
  66.             if self._closed:
  67.                 return ''
  68.             
  69.             return NeedMoreData
  70.         
  71.         line = self._lines.pop()
  72.         for ateof in self._eofstack[::-1]:
  73.             if ateof(line):
  74.                 self._lines.append(line)
  75.                 return ''
  76.                 continue
  77.         
  78.         return line
  79.  
  80.     
  81.     def unreadline(self, line):
  82.         if not line is not NeedMoreData:
  83.             raise AssertionError
  84.         self._lines.append(line)
  85.  
  86.     
  87.     def push(self, data):
  88.         '''Push some new data into this object.'''
  89.         data = self._partial + data
  90.         self._partial = ''
  91.         parts = NLCRE_crack.split(data)
  92.         self._partial = parts.pop()
  93.         lines = []
  94.         for i in range(len(parts) // 2):
  95.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  96.         
  97.         self.pushlines(lines)
  98.  
  99.     
  100.     def pushlines(self, lines):
  101.         self._lines[:0] = lines[::-1]
  102.  
  103.     
  104.     def is_closed(self):
  105.         return self._closed
  106.  
  107.     
  108.     def __iter__(self):
  109.         return self
  110.  
  111.     
  112.     def next(self):
  113.         line = self.readline()
  114.         if line == '':
  115.             raise StopIteration
  116.         
  117.         return line
  118.  
  119.  
  120.  
  121. class FeedParser:
  122.     '''A feed-style parser of email.'''
  123.     
  124.     def __init__(self, _factory = message.Message):
  125.         '''_factory is called with no arguments to create a new message obj'''
  126.         self._factory = _factory
  127.         self._input = BufferedSubFile()
  128.         self._msgstack = []
  129.         self._parse = self._parsegen().next
  130.         self._cur = None
  131.         self._last = None
  132.         self._headersonly = False
  133.  
  134.     
  135.     def _set_headersonly(self):
  136.         self._headersonly = True
  137.  
  138.     
  139.     def feed(self, data):
  140.         '''Push more data into the parser.'''
  141.         self._input.push(data)
  142.         self._call_parse()
  143.  
  144.     
  145.     def _call_parse(self):
  146.         
  147.         try:
  148.             self._parse()
  149.         except StopIteration:
  150.             pass
  151.  
  152.  
  153.     
  154.     def close(self):
  155.         '''Parse all remaining data and return the root message object.'''
  156.         self._input.close()
  157.         self._call_parse()
  158.         root = self._pop_message()
  159.         if not not (self._msgstack):
  160.             raise AssertionError
  161.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  162.             root.defects.append(errors.MultipartInvariantViolationDefect())
  163.         
  164.         return root
  165.  
  166.     
  167.     def _new_message(self):
  168.         msg = self._factory()
  169.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  170.             msg.set_default_type('message/rfc822')
  171.         
  172.         if self._msgstack:
  173.             self._msgstack[-1].attach(msg)
  174.         
  175.         self._msgstack.append(msg)
  176.         self._cur = msg
  177.         self._last = msg
  178.  
  179.     
  180.     def _pop_message(self):
  181.         retval = self._msgstack.pop()
  182.         if self._msgstack:
  183.             self._cur = self._msgstack[-1]
  184.         else:
  185.             self._cur = None
  186.         return retval
  187.  
  188.     
  189.     def _parsegen(self):
  190.         self._new_message()
  191.         headers = []
  192.         for line in self._input:
  193.             if line is NeedMoreData:
  194.                 yield NeedMoreData
  195.                 continue
  196.             
  197.             if not headerRE.match(line):
  198.                 if not NLCRE.match(line):
  199.                     self._input.unreadline(line)
  200.                 
  201.                 break
  202.             
  203.             headers.append(line)
  204.         
  205.         self._parse_headers(headers)
  206.         if self._headersonly:
  207.             lines = []
  208.             while True:
  209.                 line = self._input.readline()
  210.                 if line is NeedMoreData:
  211.                     yield NeedMoreData
  212.                     continue
  213.                 
  214.                 if line == '':
  215.                     break
  216.                 
  217.                 lines.append(line)
  218.             self._cur.set_payload(EMPTYSTRING.join(lines))
  219.             return None
  220.         
  221.         if self._cur.get_content_type() == 'message/delivery-status':
  222.             while True:
  223.                 self._input.push_eof_matcher(NLCRE.match)
  224.                 for retval in self._parsegen():
  225.                     if retval is NeedMoreData:
  226.                         yield NeedMoreData
  227.                         continue
  228.                     
  229.                     break
  230.                 
  231.                 msg = self._pop_message()
  232.                 self._input.pop_eof_matcher()
  233.                 while True:
  234.                     line = self._input.readline()
  235.                     if line is NeedMoreData:
  236.                         yield NeedMoreData
  237.                         continue
  238.                     
  239.                     break
  240.                 while True:
  241.                     line = self._input.readline()
  242.                     if line is NeedMoreData:
  243.                         yield NeedMoreData
  244.                         continue
  245.                     
  246.                     break
  247.                 if line == '':
  248.                     break
  249.                 
  250.                 self._input.unreadline(line)
  251.             return None
  252.         
  253.         if self._cur.get_content_maintype() == 'message':
  254.             for retval in self._parsegen():
  255.                 if retval is NeedMoreData:
  256.                     yield NeedMoreData
  257.                     continue
  258.                 
  259.                 break
  260.             
  261.             self._pop_message()
  262.             return None
  263.         
  264.         if self._cur.get_content_maintype() == 'multipart':
  265.             boundary = self._cur.get_boundary()
  266.             if boundary is None:
  267.                 self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
  268.                 lines = []
  269.                 for line in self._input:
  270.                     if line is NeedMoreData:
  271.                         yield NeedMoreData
  272.                         continue
  273.                     
  274.                     lines.append(line)
  275.                 
  276.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  277.                 return None
  278.             
  279.             separator = '--' + boundary
  280.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  281.             capturing_preamble = True
  282.             preamble = []
  283.             linesep = False
  284.             while True:
  285.                 line = self._input.readline()
  286.                 if line is NeedMoreData:
  287.                     yield NeedMoreData
  288.                     continue
  289.                 
  290.                 if line == '':
  291.                     break
  292.                 
  293.                 mo = boundaryre.match(line)
  294.                 if mo:
  295.                     if mo.group('end'):
  296.                         linesep = mo.group('linesep')
  297.                         break
  298.                     
  299.                     if capturing_preamble:
  300.                         if preamble:
  301.                             lastline = preamble[-1]
  302.                             eolmo = NLCRE_eol.search(lastline)
  303.                             if eolmo:
  304.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  305.                             
  306.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  307.                         
  308.                         capturing_preamble = False
  309.                         self._input.unreadline(line)
  310.                         continue
  311.                     
  312.                     while True:
  313.                         line = self._input.readline()
  314.                         if line is NeedMoreData:
  315.                             yield NeedMoreData
  316.                             continue
  317.                         
  318.                         mo = boundaryre.match(line)
  319.                         if not mo:
  320.                             self._input.unreadline(line)
  321.                             break
  322.                             continue
  323.                     self._input.push_eof_matcher(boundaryre.match)
  324.                     for retval in self._parsegen():
  325.                         if retval is NeedMoreData:
  326.                             yield NeedMoreData
  327.                             continue
  328.                         
  329.                         break
  330.                     
  331.                     if self._last.get_content_maintype() == 'multipart':
  332.                         epilogue = self._last.epilogue
  333.                         if epilogue == '':
  334.                             self._last.epilogue = None
  335.                         elif epilogue is not None:
  336.                             mo = NLCRE_eol.search(epilogue)
  337.                             if mo:
  338.                                 end = len(mo.group(0))
  339.                                 self._last.epilogue = epilogue[:-end]
  340.                             
  341.                         
  342.                     else:
  343.                         payload = self._last.get_payload()
  344.                         if isinstance(payload, basestring):
  345.                             mo = NLCRE_eol.search(payload)
  346.                             if mo:
  347.                                 payload = payload[:-len(mo.group(0))]
  348.                                 self._last.set_payload(payload)
  349.                             
  350.                         
  351.                     self._input.pop_eof_matcher()
  352.                     self._pop_message()
  353.                     self._last = self._cur
  354.                     continue
  355.                 if not capturing_preamble:
  356.                     raise AssertionError
  357.                 preamble.append(line)
  358.             if capturing_preamble:
  359.                 self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
  360.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  361.                 epilogue = []
  362.                 for line in self._input:
  363.                     if line is NeedMoreData:
  364.                         yield NeedMoreData
  365.                         continue
  366.                         continue
  367.                 
  368.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  369.                 return None
  370.             
  371.             if linesep:
  372.                 epilogue = [
  373.                     '']
  374.             else:
  375.                 epilogue = []
  376.             for line in self._input:
  377.                 if line is NeedMoreData:
  378.                     yield NeedMoreData
  379.                     continue
  380.                 
  381.                 epilogue.append(line)
  382.             
  383.             if epilogue:
  384.                 firstline = epilogue[0]
  385.                 bolmo = NLCRE_bol.match(firstline)
  386.                 if bolmo:
  387.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  388.                 
  389.             
  390.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  391.             return None
  392.         
  393.         lines = []
  394.         for line in self._input:
  395.             if line is NeedMoreData:
  396.                 yield NeedMoreData
  397.                 continue
  398.             
  399.             lines.append(line)
  400.         
  401.         self._cur.set_payload(EMPTYSTRING.join(lines))
  402.  
  403.     
  404.     def _parse_headers(self, lines):
  405.         lastheader = ''
  406.         lastvalue = []
  407.         for lineno, line in enumerate(lines):
  408.             if line[0] in ' \t':
  409.                 if not lastheader:
  410.                     defect = errors.FirstHeaderLineIsContinuationDefect(line)
  411.                     self._cur.defects.append(defect)
  412.                     continue
  413.                 
  414.                 lastvalue.append(line)
  415.                 continue
  416.             
  417.             if lastheader:
  418.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  419.                 self._cur[lastheader] = lhdr
  420.                 lastheader = ''
  421.                 lastvalue = []
  422.             
  423.             if line.startswith('From '):
  424.                 if lineno == 0:
  425.                     mo = NLCRE_eol.search(line)
  426.                     if mo:
  427.                         line = line[:-len(mo.group(0))]
  428.                     
  429.                     self._cur.set_unixfrom(line)
  430.                     continue
  431.                 elif lineno == len(lines) - 1:
  432.                     self._input.unreadline(line)
  433.                     return None
  434.                 else:
  435.                     defect = errors.MisplacedEnvelopeHeaderDefect(line)
  436.                     self._cur.defects.append(defect)
  437.             
  438.             i = line.find(':')
  439.             if i < 0:
  440.                 defect = errors.MalformedHeaderDefect(line)
  441.                 self._cur.defects.append(defect)
  442.                 continue
  443.             
  444.             lastheader = line[:i]
  445.             lastvalue = [
  446.                 line[i + 1:].lstrip()]
  447.         
  448.         if lastheader:
  449.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  450.         
  451.  
  452.  
  453.